
library(readxl)
homicide <- read_excel("~/Documents/missouri_project/data/Homicide_Cov_Bor.xlsx")


library(dplyr)
library(car)

# select MO and the eight border states
data_homicide <- homicide %>% 
  filter(state=="Arkansas" | state== "Iowa" | state=="Kansas" | state=="Illinois" 
         | state=="Kentucky" | state=="Tennessee" | state=="Nebraska" | state=="Oklahoma"
         | state=="Missouri")


# Remove the 2007 data
data_homicide <- data_homicide %>% filter(year!=2007)

# Create an indicator variable about whether the subject is Missouri with year >= 2008
data_homicide$MO_2008ge <- 0
data_homicide$MO_2008ge[which(data_homicide$year>=2008 & data_homicide$state=="Missouri")] <- 1

# Create another data frame with year only 2006, 2008-2013 data
data_homicide2 <- data_homicide %>% filter(year<=2013)

# Turn state and year into factor variables
data_homicide$year <- as.factor(data_homicide$year)
data_homicide$state <- as.factor(data_homicide$state)

data_homicide2$year <- as.factor(data_homicide2$year)
data_homicide2$state <- as.factor(data_homicide2$state)

### fit a linear model for year 2006, 2008-2019 data
fit1 <- lm(rate~state+year+MO_2008ge-1,data_homicide)
summary(fit1)
Anova(fit1)

library(clubSandwich)
vcovCR(fit1, cluster = data_homicide$state, type = "CR2")

### fit a linear model for year 2006, 2008-2013 data
fit2 <- lm(rate~state+year+MO_2008ge-1,data_homicide2)
summary(fit2)
Anova(fit2)

vcovCR(fit2, cluster = data_homicide2$state, type = "CR2")




